require("Hmisc")
require("plyr")
require("ggplot2")
require("car")
require("compositions")
options(digits=10)
options(scipen=10)
#setwd("/nfs/projects/k/kyu")
setwd("U:/gov2001/paper")

##################################################################################
## INSTRUCTIONS FOR DOWNLOADING DATA
##################################################################################
## 1) Go to https://wrds-web.wharton.upenn.edu/wrds/
## 2) Select "CRSP" from the drop-down list
## 3) Click on "Stock / Security Files"
## 4) CLick on "Monthly Stock File"
## 5) Step 1: Jan 1963 to Dec 2008
## 6) Step 2: Click on "Search the entire database"
## 7) Step 3: Select the following Columns:
##	* CRSP Permanent Company Number (PERMNO)
##	* Price (PRC)
##	* Holding Period Return (RET)
##	* Number of Shares Outstanding (SHROUT)
##	* Return on S&P Composite Index (sprtrn)
## 8) Step 4: csv file; date format=YYMMDDn8 (e.g. 19840725)
##
## Download market returns and risk-free rates from Ken French's website
## http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors.zip
## Filter monthly data to between 196301 and 200812 before importing into R
## Convert to tab-delmited text file

##############################################################
## LOAD DATA
##############################################################

## Load CRSP data (month stock returns from Jan 1963 to Dec 2008)
## PERMNO, date, TICKER, PERMCO, PRC, RET, SHROUT, sprtrn
rm(list=ls())
data.load <- read.csv("crsp_data.csv",colClasses=c("numeric","numeric","character","numeric","numeric","character","numeric","numeric"))

## Load risk-free rates data from authors
## DATE, RMRF, SMB, HML, RF
data.rf <- read.delim("famafrench.csv",colClasses=c("numeric","numeric","numeric","numeric","numeric"))

##############################################################
## DATA PREPARATION
##############################################################

## PERMNO is the CRSP's unique identifier for a company
data <- data.frame(PERMNO=data.load[,"PERMNO"])

## Convert the dates into time periods (e.g. January 1963 = 1; February 1963=2, etc.)
YEAR <- as.integer(substr(data.load$date,1,4))
MONTH <- as.integer(substr(data.load$date,5,6))
data$PERIOD <- 12*(YEAR-1963) + MONTH

## The monthly return (deal with special codes)
data$RETURN <- as.numeric(data.load$RET)
if(nrow(data[data$RETURN<=-66,])>0) data[data$RETURN %in% c(-66,-77,-88,-99),"RETURN"] <- NA

## Calculate market cap
PRICE <- abs(data.load$PRC)
SHARES <- data.load$SHROUT*1000
data$CAP <- PRICE*SHARES

## Merge market and risk-free data with stock data
data.rf <- data.rf[order(data.rf$DATE),c("DATE","RMRF","RF")]/100
data.rf$PERIOD <- 1:nrow(data.rf)
data <- merge(data,data.rf,by.x="PERIOD",by.y="PERIOD",all.x=TRUE)
data$RETRF <- data$RETURN-data$RF
data$SP500 <- data.load$sprtrn

## Write prepared data to file
data <-  data[,!colnames(data) %in% c("PRICE","SHARES","DATE")]
data <- data[order(data$PERMNO,data$PERIOD),]
save(data,file="data.RData")

##############################################################
## FUNCTION THAT CALCULATES VOLATILITY & BETA FOR ONE PERIOD
##############################################################

## Read in prepared data from file
rm(list=ls())
load("data.RData")
data <- data[complete.cases(data),]

## period: 61 to 552. Period 1=Jan 1963; period 61=Jan 1968; period 552=Dec 1968
## lag.size: num of trailing months used to calc volat and beta (e.g. 60 mo)
## lag.min: drop stocks that don't have at least this many months of trailing data
## top.caps: set to 1000 to restrict to the top 1000 stock by market cap each month
get.volbeta <- function(period=61,lag.size=60,lag.min=24,top.caps=0) {
  ## period <- 61; lag.size <- 60; lag.min <- 24; top.caps <- 0
  lag.size <- period-lag.size
  ## Show progress
  print(paste("START PERIOD",period))
  
  ## The data for the current month
  data.current <- data[data$PERIOD==period,]
  ## 5-years of trailing data
  data.lag <- data[data$PERIOD>=lag.size & data$PERIOD<period,]

  ## Calculate volat and betas for prev 5 years
  data.calc <- ddply(data.lag,.(PERMNO),summarise,VOLAT=sd(RETURN,na.rm=T),
    BETA=suppressWarnings(tryCatch(coef(lm(RETURN~RMRF))[2],error=function(e) NA)),
    PERIOD=max(PERIOD),N=length(RETURN))

  ## Remove stocks that don't have sufficient data
  data.calc <- data.calc[data.calc$PERIOD==period-1 & data.calc$N>=lag.min,]
  data.calc <- data.calc[,colnames(data.calc)!="N"]

  ## Find market caps for the previous month
  data.calc <- merge(data.calc,data.lag[,c("PERMNO","PERIOD","CAP")],by.x=c("PERMNO","PERIOD"),by.y=c("PERMNO","PERIOD"),all=FALSE)
  
  ## Link current month stocks with their trailing data
  data.calc$PERIOD <- data.calc$PERIOD+1
  data.calc <- merge(data.calc,data.current[,c("PERMNO","PERIOD","RETURN")],by.x=c("PERMNO","PERIOD"),by.y=c("PERMNO","PERIOD"),all=FALSE)

  ## Remove NAs
  data.calc <- data.calc[complete.cases(data.calc),]
  
  ## Restrict to top market cap stocks if necessary
  if(top.caps>0) {
    caprank <- nrow(data.calc)-top.caps
    data.calc <- data.calc[rank(data.calc$CAP,ties="min")>caprank,]
  }

  ## Rank stocks by volatility and beta: 1 is lowest, 5 is highest
  data.calc$VOLATQ <- as.numeric(cut2(data.calc$VOLAT,g=5,digits=16))
  data.calc$BETAQ <- as.numeric(cut2(data.calc$BETA,g=5,digits=16))

  ## Results
  return(data.calc)
}

##############################################################
## RESULTS
##############################################################

## RUN FUNCTION FOR ALL MONTHS (61 to 552)
start <- 61
end <- 552
results <- data.frame()
for(i in start:end) {
  # All stocks
  results <- rbind(results,get.volbeta(period=i,lag.size=60,lag.min=24,top.caps=0))
  # Top 1000 stocks
  # results <- rbind(results,get.volbeta(period=i,lag.size=60,lag.min=24,top.caps=1000)) 
}
#load("results_data_all.RData")
ret.volat <- ddply(results,.(PERIOD,VOLATQ),summarise,RETURN=weighted.mean(RETURN,CAP))
ret.beta <- ddply(results,.(PERIOD,BETAQ),summarise,RETURN=weighted.mean(RETURN,CAP))
volat <- cbind(PERIOD=start:end,ddply(ret.volat,.(VOLATQ),summarise,CUMRET=cumprod(RETURN+1)))
beta <- cbind(PERIOD=start:end,ddply(ret.beta,.(BETAQ),summarise,CUMRET=cumprod(RETURN+1)))
volat[volat$PERIOD==end,"CUMRET"]
beta[beta$PERIOD==end,"CUMRET"]

## Save results to file
save(results,file="results_data_all.RData")
save(volat,file="results_volat_all.RData")
save(beta,file="results_beta_all.RData")
